%macro get_sched_g;
			
	proc printto new log = "&logdir./get_sched_g.txt";
	run;
	
	proc sql;
		[redacted]
			(
				
			select 
				a.[redacted] as child_flr_tin, 
				a.[redacted] as child_corp_tin,
				c.[redacted] as parent_tin, 
				c.[redacted] as own_share,
				floor(a.[redacted]/10000) as year
			from [redacted] as a
				INNER JOIN [redacted] as b
					on (a.[redacted] = b.[redacted]
		    				and a.[redacted] = b.[redacted]
		    				and a.[redacted] = b.[redacted])
				INNER JOIN [redacted] as c
					on (a.[redacted] = c.[redacted]
		   			 	and a.[redacted] = c.[redacted]
		    				and a.[redacted] = c.[redacted]
		    				and b.[redacted] = c.[redacted])
				WHERE a.[redacted] between 20190000 and 20201231
				and parent_tin ~= 2592233504
		);
		disconnect from iq;
		quit;
		
		
		data sample;
			set sample;
			child_tin = max(child_flr_tin, child_corp_tin);
			drop child_flr_tin child_corp_tin;
			if own_share = 0 | own_share = . then own_share = 1;
			if own_share > 1 then own_share = own_share/100;
		run;
		
		proc means data = sample nway noprint;
			class child_tin parent_tin year;
			output out = rlib.sched_g_link(drop=_type_ _freq_)
				max(own_share) = own_share;
		run;
		
		
	proc printto;
	run;

	
%mend;





%macro get_f851;
			
	proc printto new log = "&logdir./get_f851.txt";
	run;
	
	proc sql;
		connect to odbc as iq
			(dsn = cdwsas_titan uid = &uid pwd = &pwd
				insertbuff = 18000 connection = global);
			execute(set temporary option escape_character = on) by iq;
			execute(set temporary option quoted_identifier = 'off') by iq;
			create table rlib.f851 as
			select * from connection to iq
			(
				
			select distinct
				[redacted] as child_tin,
				[redacted] as parent_tin,
				floor([redacted]/100) as year
			from [redacted]
			where [redacted] between 201900 and 202012
				and [redacted] ~= [redacted]
				and [redacted] ~= [redacted];
		);
		disconnect from iq;
		quit;
		
		
		
		
	proc printto;
	run;

	
%mend;





%macro get_f1065_k1;
			
	proc printto new log = "&logdir./get_f1065_k1.txt";
	run;
		
	
	proc sql;
		connect to odbc as iq
			(dsn = cdwsas_titan uid = &uid pwd = &pwd
				insertbuff = 18000 connection = global);
			execute(set temporary option escape_character = on) by iq;
			execute(set temporary option quoted_identifier = 'off') by iq;
			
			create table k1 as
			select * from connection to iq
			(	
				select 
					[redacted] as parent_tin,
					[redacted] as child_tin,
					[redacted] as share_begin,
					[redacted] as share_end,
					[redacted] as ord_inc_k1,
					[redacted] as year,
					[redacted] as edate
				from [redacted] 
				where [redacted] in(2019,2020)
					and [redacted] in(5,6)
				order by [redacted], year, [redacted], edate
			
				
		);
		disconnect from iq;
		quit;
		
		
		data k1;
			set k1;
			by child_tin year parent_tin;
			if last.parent_tin;
			drop edate;
		run;
		
		
		data rlib.f1065_k1;
			merge k1(in=a) rlib.f1065(in=b rename=(firm_id = child_tin));
			by child_tin year;
			if a & b;
			
			own_share = share_begin;
			if own_share = 0 | own_share = . then own_share = share_end;
			if own_share > 1 then own_share = own_share/100;
			if (own_share = 0 | own_share = .) &
				sign(ord_inc_k1) = sign(ord_inc_entity) & 
					abs(ord_inc_entity) >= abs(ord_inc_k1)
					& abs(ord_inc_entity) > 0 then
						own_share = ord_inc_k1/ord_inc_entity;
			
			if own_share = 0 | own_share = . then delete;
			
			keep parent_tin child_tin own_share year;
		run;
						
		
		
		
	proc printto;
	run;

	
%mend;





%macro assemble_links;

	proc printto new log = "&logdir./assemble_links.txt";
	run;
	
	data appended;
		set rlib.f1065_k1
			rlib.f851
			rlib.sched_g_link;
			if own_share = . then own_share = 1;
	run;
	
	proc means data = appended nway noprint;
		class  child_tin year parent_tin;
		output out = links(drop=_type_ _freq_)
			max(own_share) = own_share_link;
	run;
	
	
	/* normalize so that the sum by child_tin and year can't exceed one */
	proc means data = links nway noprint;
		class child_tin year;
		output out = tot_own(drop=_type_ _freq_)
			sum(own_share_link) = tot_own;
	run;
	data rlib.links;
		merge links tot_own;
		by child_tin year;
		if tot_own > 1 then own_share_link = own_share_link/tot_own;
		drop tot_own;
	run;
	
	
	data tot_own;
		set tot_own;
		too_big = tot_own > 1.01;
	run;
	proc means data = tot_own mean;
		var too_big;
	run;
			
	proc printto;
	run;

%mend;



